
from init import PATHS
import logging
LOGGER = logging.getLogger(__name__)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('seaborn')
import seaborn as sns
sns.set(style="white")
pal = sns.color_palette('colorblind')
from C_PatentVariables import conventions_names_colors
dict_subsector_shortnames, dict_var_colors, list_of_subsectors_in_cleancars = conventions_names_colors.main()



def main():
    LOGGER.info('Begin c_othergraphs_patents_familylevel.py')
    export_patent_codes_table()
    dfYearLevel = pd.read_csv(PATHS.dropbox / 'Data_outputted/C_PatentVariables/TimeSeries_FamilyCounts_in_ipc_cpc_transpo_from_all.csv')
    dfYearLevel.index = dfYearLevel['earliest_filing_year']
    plot_cleancar_techs(dfYearLevel)
    plot_bat_vs_fc(dfYearLevel)
    plot_clean_grey_dirty(dfYearLevel)
    LOGGER.info('END c_othergraphs_patents_familylevel.py')


def export_patent_codes_table():
    df_table = pd.read_excel(PATHS.rawdata / 'GreenCodes/GreenCPC_IPC_codes_fromliterature.xlsx')
    df_table = df_table[df_table['Sector'].isin(['Transport', 'Electricity Transport'])]
    df_table = df_table[df_table['Include'] == 1]
    df_table['Type'] = df_table['Type'].str.replace('Clean/Grey', 'Grey')
    df_table['Type'] = df_table['Type'].str.replace('Clean ', 'Clean')
    df_table['Sub-sector'].value_counts()
    df_table = df_table[['Type', 'Sub-sector', 'Code', 'Description']]
    df_table = df_table.sort_values(by=['Type', 'Sub-sector', 'Code'], ascending=True)
    df_table = df_table.fillna('')
    pd.set_option('max_colwidth', 30)
    # Dealing with the fact that some subsector have only one code and the lines don't show properly
    newdf_table = []
    listonentry = list(df_table['Sub-sector'].value_counts()[df_table['Sub-sector'].value_counts() == 1].index)
    for oneentryonly in listonentry:
        typei = df_table[df_table['Sub-sector'] == oneentryonly]['Type'].iloc[0]
        codei = df_table[df_table['Sub-sector'] == oneentryonly]['Code'].iloc[0]
        new_row = {'Type': typei, 'Sub-sector': oneentryonly, 'Code': codei, 'Description': ''}
        df_table = df_table.append(new_row, ignore_index=True)
        newdf_table.append(df_table[df_table['Sub-sector'] == oneentryonly].sort_values(by=['Sub-sector', 'Code'], ascending=False))
    newdf_table.append(df_table[~df_table['Sub-sector'].isin(listonentry)].sort_values(by=['Sub-sector', 'Code'], ascending=True))
    newdf_table = pd.concat(newdf_table)
    newdf_table = newdf_table.sort_values(by=['Type', 'Sub-sector'], ascending=True)
    for type_i in ['Clean', 'Dirty', 'Grey']:
        df_table_sub = newdf_table[newdf_table['Type'] == type_i].drop(columns='Type')
        df_table_sub = df_table_sub.set_index(['Sub-sector', 'Code', 'Description'])
        pd.set_option('max_colwidth', -1)
        df_table_tex = df_table_sub.to_latex(index=True, column_format="llL{12cm}", multirow=True)
        with open(PATHS.tables / f'greencodeslist_{type_i}_forSOM.tex', "w") as f:
            f.write(df_table_tex)


def plot_cleancar_techs(dfYearLevel):
    # Non Exclusive
    maskYears = dfYearLevel['earliest_filing_year'].isin(range(1970, 2018))
    fig, ax = plt.subplots()
    for var in ['Count_Bat', 'Count_FC', 'Count_HV', 'Count_EV', 'Count_H2', 'Count_Enab', 'Count_Stor']:
        plt.plot(dfYearLevel[maskYears][var], label=dict_var_colors[var][0], color=dict_var_colors[var][1], linewidth=3)
    plt.legend(ncol=2)
    plt.ylim(0)
    plt.xlabel('Earliest Filing Year')
    plt.ylabel('Number of DocDB Families')
    plt.xticks(list(range(1970, 2018, 10)))
    plt.savefig(PATHS.figures / 'family_counts_over_time/cleancar_tech.png')
    plt.yscale('log')
    plt.ylim(0)
    plt.savefig(PATHS.figures / 'family_counts_over_time/cleancar_tech_log.png')
    plt.close()
    # Exclusive
    maskYears = dfYearLevel['earliest_filing_year'].isin(range(1970, 2018))
    fig, ax = plt.subplots()
    for var in ['Count_Bat_excl', 'Count_FC_excl', 'Count_HV_excl', 'Count_EV_excl', 'Count_H2_excl', 'Count_Enab_excl', 'Count_Stor_excl', 'Count_bothbatfc']:
        plt.plot(dfYearLevel[maskYears][var], label=dict_var_colors[var][0], color=dict_var_colors[var][1], linewidth=3)
    plt.legend(ncol=2)
    # plt.ylim(0)
    plt.xlabel('Earliest Filing Year')
    plt.ylabel('Number of DocDB Families')
    plt.xticks(list(range(1970, 2018, 10)))
    plt.savefig(PATHS.figures / 'family_counts_over_time/cleancar_tech_excl.png')
    plt.yscale('log')
    plt.savefig(PATHS.figures / 'family_counts_over_time/cleancar_tech_excl_log.png')
    plt.close()


def plot_bat_vs_fc(dfYearLevel):
    # Non Exclusive
    maskYears = dfYearLevel['earliest_filing_year'].isin(range(1970, 2018))
    fig, ax = plt.subplots()
    for var in ['Count_Bat', 'Count_FC']:
        plt.plot(dfYearLevel[maskYears][var], label=dict_var_colors[var][0], color=dict_var_colors[var][1], linewidth=3)
    plt.legend(ncol=1)
    plt.xlabel('Earliest Filing Year')
    plt.ylabel('Number of DocDB Families')
    plt.xticks(list(range(1970, 2018, 10)))
    plt.savefig(PATHS.figures / 'family_counts_over_time/bat_vs_fc.png')
    plt.yscale('log')
    plt.savefig(PATHS.figures / 'family_counts_over_time/bat_vs_fc_log.png')
    plt.close()
    # Exclusive
    maskYears = dfYearLevel['earliest_filing_year'].isin(range(1970, 2018))
    fig, ax = plt.subplots()
    for var in ['Count_Bat_excl', 'Count_FC_excl', 'Count_bothbatfc']:
        plt.plot(dfYearLevel[maskYears][var], label=dict_var_colors[var][0], color=dict_var_colors[var][1], linewidth=3)
    plt.legend(ncol=1)
    plt.xlabel('Earliest Filing Year')
    plt.ylabel('Number of DocDB Families')
    plt.xticks(list(range(1970, 2018, 10)))
    plt.savefig(PATHS.figures / 'family_counts_over_time/bat_vs_fc_excl.png')
    plt.yscale('log')
    plt.savefig(PATHS.figures / 'family_counts_over_time/bat_vs_fc_excl_log.png')
    # plt.show()
    plt.close()
    # Exclusive w H2
    list(dfYearLevel.columns)
    maskYears = dfYearLevel['earliest_filing_year'].isin(range(1970, 2018))
    fig, ax = plt.subplots()
    for var in ['Count_Bat_excl_wH2', 'Count_FC_excl_wH2', 'Count_bothbatfc_wH2']:
        plt.plot(dfYearLevel[maskYears][var], label=dict_var_colors[var][0], color=dict_var_colors[var][1], linewidth=3)
    plt.legend(ncol=1)
    plt.xlabel('Earliest Filing Year')
    plt.ylabel('Number of DocDB Families')
    plt.xticks(list(range(1970, 2018, 10)))
    plt.savefig(PATHS.figures / 'family_counts_over_time/bat_vs_fc_excl_wH2.png')
    plt.yscale('log')
    plt.savefig(PATHS.figures / 'family_counts_over_time/bat_vs_fc_excl_wH2_log.png')
    plt.close()


def plot_clean_grey_dirty(dfYearLevel):
    # Non Exclusive
    list(dfYearLevel.columns)
    maskYears = dfYearLevel['earliest_filing_year'].isin(range(1970, 2018))
    fig, ax = plt.subplots()
    for var in ['CountClean', 'CountGrey', 'CountDirty']:
        plt.plot(dfYearLevel[maskYears][var], label=dict_var_colors[var][0], color=dict_var_colors[var][1], linewidth=3)
    plt.legend(ncol=1)
    plt.ylim(0)
    plt.xlabel('Earliest Filing Year')
    plt.ylabel('Number of DocDB Families')
    plt.xticks(list(range(1970, 2018, 10)))
    plt.savefig(PATHS.figures / 'family_counts_over_time/clean_grey_dirty_transport.png')
    plt.close()
    # Exclusive
    maskYears = dfYearLevel['earliest_filing_year'].isin(range(1970, 2018))
    fig, ax = plt.subplots()
    for var in ['CountClean_excl', 'CountGrey_excl', 'CountDirty_excl']:
        print(var)
        plt.plot(dfYearLevel[maskYears][var], label=dict_var_colors[var][0], color=dict_var_colors[var][1], linewidth=3)
    plt.legend(ncol=1)
    plt.ylim(0)
    plt.xlabel('Earliest Filing Year')
    plt.ylabel('Number of DocDB Families')
    plt.xticks(list(range(1970, 2018, 10)))
    plt.savefig(PATHS.figures / 'family_counts_over_time/clean_grey_dirty_excl_transport.png')
    plt.close()

